gorgonia.org/gorgonia@v0.9.17/cuda modules/src/elembinop.cu (about)

     1  #define _USE_MATH_DEFINES
     2  #include <math.h>
     3  
     4  #define THREADID \
     5  	int blockId = blockIdx.x + blockIdx.y * gridDim.x + gridDim.x * gridDim.y * blockIdx.z;\
     6  	int idx = blockId * (blockDim.x * blockDim.y * blockDim.z) + (threadIdx.z * (blockDim.x * blockDim.y)) + (threadIdx.y * blockDim.x) + threadIdx.x;
     7  
     8  #define CHECKSIZE \
     9  	if (idx >= size) { \
    10  		return; \
    11  	}
    12  
    13  #define VVBINOP(name, t, type, op)\
    14  	__global__ void  name ##_vv_ ##t(type* A, type* B, int size) { \
    15  		THREADID \
    16  		CHECKSIZE \
    17  		A[idx] = A[idx] op B[idx];}
    18  
    19  #define VSBINOP(name, t, type, op)\
    20  	__global__ void  name ##_vs_ ##t(type* A, type* B, int size) { \
    21  		THREADID \
    22  		CHECKSIZE \
    23  		A[idx] = A[idx] op B[0];}
    24  
    25  #define SVBINOP(name, t, type, op)\
    26  	__global__ void  name ##_sv_ ##t(type* A, type* B, int size) { \
    27  		THREADID \
    28  		CHECKSIZE \
    29  		B[idx] = A[0] op B[idx];}
    30  
    31  #define SSBINOP(name, t, type, op)\
    32  	__global__ void  name ##_ss_ ##t(type* A, type* B, int size) { \
    33  		THREADID \
    34  		CHECKSIZE \
    35  		A[0] = A[0] op B[0];}
    36  
    37  /* VECTOR-VECTOR BIN OP */
    38  
    39  extern "C" { VVBINOP(add, f64, double, +) }
    40  extern "C" { VVBINOP(add, f32, float, +) }
    41  
    42  extern "C" { VVBINOP(sub, f64, double, -) }
    43  extern "C" { VVBINOP(sub, f32, float, -) }
    44  
    45  extern "C" { VVBINOP(mul, f64, double, *) }
    46  extern "C" { VVBINOP(mul, f32, float, *) }
    47  
    48  extern "C" { VVBINOP(div, f64, double, /) }
    49  extern "C" { VVBINOP(div, f32, float, /) }
    50  
    51  extern "C" { VVBINOP(gt, f64, double, >)}
    52  extern "C" { VVBINOP(gt, f32, float, >)}
    53  
    54  extern "C" { VVBINOP(gte, f64, double, >=)}
    55  extern "C" { VVBINOP(gte, f32, float, >=)}
    56  
    57  extern "C" { VVBINOP(lt, f64, double, <)}
    58  extern "C" { VVBINOP(lt, f32, float, <)}
    59  
    60  extern "C" { VVBINOP(lte, f64, double, <=)}
    61  extern "C" { VVBINOP(lte, f32, float, <=)}
    62  
    63  extern "C" { VVBINOP(eq, f64, double, ==)}
    64  extern "C" { VVBINOP(eq, f32, float, ==)}
    65  
    66  extern "C" { VVBINOP(ne, f64, double, !=)}
    67  extern "C" { VVBINOP(ne, f32, float, !=)}
    68  
    69  
    70  /* VECTOR-SCALAR BIN OP */
    71  
    72  extern "C" { VSBINOP(add, f64, double, +) }
    73  extern "C" { VSBINOP(add, f32, float, +) }
    74  
    75  extern "C" { VSBINOP(sub, f64, double, -) }
    76  extern "C" { VSBINOP(sub, f32, float, -) }
    77  
    78  extern "C" { VSBINOP(mul, f64, double, *) }
    79  extern "C" { VSBINOP(mul, f32, float, *) }
    80  
    81  extern "C" { VSBINOP(div, f64, double, /) }
    82  extern "C" { VSBINOP(div, f32, float, /) }
    83  
    84  extern "C" { VSBINOP(gt, f64, double, >)}
    85  extern "C" { VSBINOP(gt, f32, float, >)}
    86  
    87  extern "C" { VSBINOP(gte, f64, double, >=)}
    88  extern "C" { VSBINOP(gte, f32, float, >=)}
    89  
    90  extern "C" { VSBINOP(lt, f64, double, <)}
    91  extern "C" { VSBINOP(lt, f32, float, <)}
    92  
    93  extern "C" { VSBINOP(lte, f64, double, <=)}
    94  extern "C" { VSBINOP(lte, f32, float, <=)}
    95  
    96  extern "C" { VSBINOP(eq, f64, double, ==)}
    97  extern "C" { VSBINOP(eq, f32, float, ==)}
    98  
    99  extern "C" { VSBINOP(ne, f64, double, !=)}
   100  extern "C" { VSBINOP(ne, f32, float, !=)}
   101  
   102  /* SCALAR-VECTOR BIN OP */
   103  
   104  extern "C" { SVBINOP(add, f64, double, +) }
   105  extern "C" { SVBINOP(add, f32, float, +) }
   106  
   107  extern "C" { SVBINOP(sub, f64, double, -) }
   108  extern "C" { SVBINOP(sub, f32, float, -) }
   109  
   110  extern "C" { SVBINOP(mul, f64, double, *) }
   111  extern "C" { SVBINOP(mul, f32, float, *) }
   112  
   113  extern "C" { SVBINOP(div, f64, double, /) }
   114  extern "C" { SVBINOP(div, f32, float, /) }
   115  
   116  extern "C" { SVBINOP(gt, f64, double, >) }
   117  extern "C" { SVBINOP(gt, f32, float, >) }
   118  
   119  extern "C" { SVBINOP(gte, f64, double, >=) }
   120  extern "C" { SVBINOP(gte, f32, float, >=) }
   121  
   122  extern "C" { SVBINOP(lt, f64, double, <) }
   123  extern "C" { SVBINOP(lt, f32, float, <) }
   124  
   125  extern "C" { SVBINOP(lte, f64, double, <=) }
   126  extern "C" { SVBINOP(lte, f32, float, <=) }
   127  
   128  extern "C" { SVBINOP(eq, f64, double, ==) }
   129  extern "C" { SVBINOP(eq, f32, float, ==) }
   130  
   131  extern "C" { SVBINOP(ne, f64, double, !=) }
   132  extern "C" { SVBINOP(ne, f32, float, !=) }
   133  
   134  /* SCALAR-SCALAR BIN OP */	
   135  
   136  extern "C" { SSBINOP(add, f64, double, +) }
   137  extern "C" { SSBINOP(add, f32, float, +) }
   138  
   139  extern "C" { SSBINOP(sub, f64, double, -) }
   140  extern "C" { SSBINOP(sub, f32, float, -) }
   141  
   142  extern "C" { SSBINOP(mul, f64, double, *) }
   143  extern "C" { SSBINOP(mul, f32, float, *) }
   144  
   145  extern "C" { SSBINOP(div, f64, double, /) }
   146  extern "C" { SSBINOP(div, f32, float, /) }
   147  
   148  extern "C" { SSBINOP(gt, f64, double, >)}
   149  extern "C" { SSBINOP(gt, f32, float, >)}
   150  
   151  extern "C" { SSBINOP(gte, f64, double, >=)}
   152  extern "C" { SSBINOP(gte, f32, float, >=)}
   153  
   154  extern "C" { SSBINOP(lt, f64, double, <)}
   155  extern "C" { SSBINOP(lt, f32, float, <)}
   156  
   157  extern "C" { SSBINOP(lte, f64, double, <=)}
   158  extern "C" { SSBINOP(lte, f32, float, <=)}
   159  
   160  extern "C" { SSBINOP(eq, f64, double, ==)}
   161  extern "C" { SSBINOP(eq, f32, float, ==)}
   162  
   163  extern "C" { SSBINOP(ne, f64, double, !=)}
   164  extern "C" { SSBINOP(ne, f32, float, !=)}
   165  
   166  /* FUNCTION BIN OP */
   167  
   168  #define VVFNBINOP(name, t, type, op)\
   169  	__global__ void  name ##_vv_ ##t(type* A, type* B, int size) { \
   170  		THREADID \
   171  		CHECKSIZE \
   172  		A[idx] = op(A[idx], B[idx]);}
   173  
   174  #define VSFNBINOP(name, t, type, op)\
   175  	__global__ void  name ##_vs_ ##t(type* A, type* B, int size) { \
   176  		THREADID \
   177  		CHECKSIZE \
   178  		A[idx] = op(A[idx], B[0]);}
   179  
   180  #define SVFNBINOP(name, t, type, op)\
   181  	__global__ void  name ##_sv_ ##t(type* A, type* B, int size) { \
   182  		THREADID \
   183  		CHECKSIZE \
   184  		B[idx] = op(A[0], B[idx]);}
   185  
   186  #define SSFNBINOP(name, t, type, op)\
   187  	__global__ void  name ##_ss_ ##t(type* A, type* B, int size) { \
   188  		THREADID \
   189  		CHECKSIZE \
   190  		A[0] = op(A[0], B[0]);}
   191  
   192  extern "C" { VVFNBINOP(pow, f64, double, pow) }
   193  extern "C" { VVFNBINOP(pow, f32, float, powf) }
   194  extern "C" { VSFNBINOP(pow, f64, double, pow) }
   195  extern "C" { VSFNBINOP(pow, f32, float, powf) }
   196  extern "C" { SVFNBINOP(pow, f64, double, pow) }
   197  extern "C" { SVFNBINOP(pow, f32, float, powf) }
   198  extern "C" { SSFNBINOP(pow, f64, double, pow) }
   199  extern "C" { SSFNBINOP(pow, f32, float, powf) }
   200  
   201  /*
   202  extern "C" { VVFNBINOP(mod, f64, double, modf) }
   203  extern "C" { VVFNBINOP(mod, f32, float, modff) }
   204  extern "C" { VSFNBINOP(mod, f64, double, modf) }
   205  extern "C" { VSFNBINOP(mod, f32, float, modff) }
   206  extern "C" { SVFNBINOP(mod, f64, double, modf) }
   207  extern "C" { SVFNBINOP(mod, f32, float, modff) }
   208  extern "C" { SSFNBINOP(mod, f64, double, modf) }
   209  extern "C" { SSFNBINOP(mod, f32, float, modff) }
   210  */